{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# note\n",
    "* Didn't mean to generalize NN here. Just plow through this `400>25>10` setup to get the feeling of NN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%reload_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "import sys\n",
    "sys.path.append('..')\n",
    "\n",
    "from helper import nn\n",
    "from helper import logistic_regression as lr\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# prepare data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(5000, 401)"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_raw, y_raw = nn.load_data('ex4data1.mat', transpose=False)\n",
    "X = np.insert(X_raw, 0, np.ones(X_raw.shape[0]), axis=1)\n",
    "X.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "***"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([10, 10, 10, ...,  9,  9,  9], dtype=uint8)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_raw"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.,  0.,  0., ...,  0.,  0.,  1.],\n",
       "       [ 0.,  0.,  0., ...,  0.,  0.,  1.],\n",
       "       [ 0.,  0.,  0., ...,  0.,  0.,  1.],\n",
       "       ..., \n",
       "       [ 0.,  0.,  0., ...,  0.,  1.,  0.],\n",
       "       [ 0.,  0.,  0., ...,  0.,  1.,  0.],\n",
       "       [ 0.,  0.,  0., ...,  0.,  1.,  0.]])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y = nn.expand_y(y_raw)\n",
    "y"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# load weight"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((25, 401), (10, 26))"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t1, t2 = nn.load_weight('ex4weights.mat')\n",
    "t1.shape, t2.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(10285,)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "theta = nn.serialize(t1, t2)  # flatten params\n",
    "theta.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# feed forward\n",
    "> (400 + 1) -> (25 + 1) -> (10)\n",
    "\n",
    "<img style=\"float: left;\" src=\"../img/nn_model.png\">"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[  1.12661530e-04,   1.74127856e-03,   2.52696959e-03, ...,\n",
       "          4.01468105e-04,   6.48072305e-03,   9.95734012e-01],\n",
       "       [  4.79026796e-04,   2.41495958e-03,   3.44755685e-03, ...,\n",
       "          2.39107046e-03,   1.97025086e-03,   9.95696931e-01],\n",
       "       [  8.85702310e-05,   3.24266731e-03,   2.55419797e-02, ...,\n",
       "          6.22892325e-02,   5.49803551e-03,   9.28008397e-01],\n",
       "       ..., \n",
       "       [  5.17641791e-02,   3.81715020e-03,   2.96297510e-02, ...,\n",
       "          2.15667361e-03,   6.49826950e-01,   2.42384687e-05],\n",
       "       [  8.30631310e-04,   6.22003774e-04,   3.14518512e-04, ...,\n",
       "          1.19366192e-02,   9.71410499e-01,   2.06173648e-04],\n",
       "       [  4.81465717e-05,   4.58821829e-04,   2.15146201e-05, ...,\n",
       "          5.73434571e-03,   6.96288990e-01,   8.18576980e-02]])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "_, _, _, _, h = nn.feed_forward(theta, X)\n",
    "h # 5000*10"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# cost function\n",
    "<img style=\"float: left;\" src=\"../img/nn_cost.png\">"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "think about this, now we have $y$ and $h_{\\theta} \\in R^{5000 \\times 10}$  \n",
    "If you just ignore the m and k dimention, pairwisely this computation is trivial.  \n",
    "the eqation $= y*log(h_{\\theta}) - (1-y)*log(1-h_{\\theta})$  \n",
    "all you need to do after pairwise computation is sums this 2d array up and divided by m"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.28762916516131892"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.cost(theta, X, y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# regularized cost function\n",
    "<img style=\"float: left;\" src=\"../img/nn_regcost.png\">"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "the first column of t1 and t2 is intercept $\\theta$, just forget them when you do regularization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.38376985909092365"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.regularized_cost(theta, X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
